{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c36ed41c-fbb9-4e99-a583-46e5898ac9e4",
   "metadata": {},
   "source": [
    "Follow along this tutorial: https://github.com/alexeygrigorev/rag-agents-workshop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c5f2c47-4552-4620-bc04-c484617aa597",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install minsearch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a2f94620-4db6-4154-915c-cce211e5b305",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests \n",
    "\n",
    "docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'\n",
    "docs_response = requests.get(docs_url)\n",
    "documents_raw = docs_response.json()\n",
    "\n",
    "documents = []\n",
    "\n",
    "for course in documents_raw:\n",
    "    course_name = course['course']\n",
    "\n",
    "    for doc in course['documents']:\n",
    "        doc['course'] = course_name\n",
    "        documents.append(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e29ed14f-af3a-4340-8f40-1c7c2cc207c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\alexe\\miniconda3\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:11: UserWarning: A NumPy version >=1.23.5 and <2.3.0 is required for this version of SciPy (detected version 2.3.0)\n",
      "  from scipy.sparse import csr_matrix, issparse\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<minsearch.append.AppendableIndex at 0x233d5173b90>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from minsearch import AppendableIndex\n",
    "\n",
    "index = AppendableIndex(\n",
    "    text_fields=[\"question\", \"text\", \"section\"],\n",
    "    keyword_fields=[\"course\"]\n",
    ")\n",
    "\n",
    "index.fit(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "f8749d06-f715-4ab5-ac29-9f35bee27352",
   "metadata": {},
   "outputs": [],
   "source": [
    "def search(query):\n",
    "    boost = {'question': 3.0, 'section': 0.5}\n",
    "\n",
    "    results = index.search(\n",
    "        query=query,\n",
    "        filter_dict={'course': 'data-engineering-zoomcamp'},\n",
    "        boost_dict=boost,\n",
    "        num_results=5,\n",
    "        output_ids=True\n",
    "    )\n",
    "\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "de65e06c-8fb5-4bf8-b24e-09661ed36d7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "question = 'Can I still join the course?'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fa4be2c8-2f4e-4590-aef2-01220a82c8e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt_template = \"\"\"\n",
    "You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.\n",
    "Use only the facts from the CONTEXT when answering the QUESTION.\n",
    "\n",
    "<QUESTION>\n",
    "{question}\n",
    "</QUESTION>\n",
    "\n",
    "<CONTEXT>\n",
    "{context}\n",
    "</CONTEXT>\n",
    "\"\"\".strip()\n",
    "\n",
    "def build_prompt(query, search_results):\n",
    "    context = \"\"\n",
    "\n",
    "    for doc in search_results:\n",
    "        context = context + f\"section: {doc['section']}\\nquestion: {doc['question']}\\nanswer: {doc['text']}\\n\\n\"\n",
    "    \n",
    "    prompt = prompt_template.format(question=query, context=context).strip()\n",
    "    return prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7f8aa058-c45e-42b4-997e-40c8aa384b1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "search_results = search(question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ac1f8d40-a251-4fbf-b210-f602df8def75",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = build_prompt(question, search_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5a86d4c6-02ed-404f-ba48-5b09ca16de4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "client = OpenAI()\n",
    "\n",
    "def llm(prompt):\n",
    "    response = client.chat.completions.create(\n",
    "        model='gpt-4o-mini',\n",
    "        messages=[{\"role\": \"user\", \"content\": prompt}]\n",
    "    )\n",
    "    return response.choices[0].message.content\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "1c6300ca-29d9-420e-9c4d-d49fd5088239",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer = llm(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "88130ac7-7f1a-44ea-9d93-a611131f2c27",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Yes, you can still join the course after the start date. Even if you don't register, you're eligible to submit the homeworks. However, be aware that there will be deadlines for turning in the final projects, so it's best not to leave everything for the last minute.\n"
     ]
    }
   ],
   "source": [
    "print(answer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "377e1607-5dfc-422d-af86-2179e3e4e7f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def rag(query):\n",
    "    search_results = search(query)\n",
    "    prompt = build_prompt(query, search_results)\n",
    "    answer = llm(prompt)\n",
    "    return answer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "51608232-f6a9-4345-ba90-9f10054ad323",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'I’m sorry, but there is no information available in the context regarding how to patch KDE under FreeBSD.'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rag(\"How do I patch KDE under FreeBSD?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ee43fa1-ce00-4598-9b21-2c8a17d65c09",
   "metadata": {},
   "source": [
    "## \"Agentic\" RAG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "0cc51b91-5779-43b2-80c6-c7159afbb486",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt_template = \"\"\"\n",
    "You're a course teaching assistant.\n",
    "\n",
    "You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.\n",
    "At the beginning the context is EMPTY.\n",
    "\n",
    "<QUESTION>\n",
    "{question}\n",
    "</QUESTION>\n",
    "\n",
    "<CONTEXT> \n",
    "{context}\n",
    "</CONTEXT>\n",
    "\n",
    "If CONTEXT is EMPTY, you can use our FAQ database.\n",
    "In this case, use the following output template:\n",
    "\n",
    "{{\n",
    "\"action\": \"SEARCH\",\n",
    "\"reasoning\": \"<add your reasoning here>\"\n",
    "}}\n",
    "\n",
    "If you can answer the QUESTION using CONTEXT, use this template:\n",
    "\n",
    "{{\n",
    "\"action\": \"ANSWER\",\n",
    "\"answer\": \"<your answer>\",\n",
    "\"source\": \"CONTEXT\"\n",
    "}}\n",
    "\n",
    "If the context doesn't contain the answer, use your own knowledge to answer the question\n",
    "\n",
    "{{\n",
    "\"action\": \"ANSWER\",\n",
    "\"answer\": \"<your answer>\",\n",
    "\"source\": \"OWN_KNOWLEDGE\"\n",
    "}}\n",
    "\"\"\".strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "cae701f4-342a-41fd-9ab6-46e3eceff7b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "question = 'Can I still join the course?'\n",
    "context = 'EMPTY'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f64b066f-3f6a-4a7a-aeab-bda815dcacdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = prompt_template.format(question=question, context=context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "08d87856-489e-4a8b-8818-271d0da83188",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer_json = llm(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "350ac019-50cf-48a2-b47b-05fa13cf6ee1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "247bd647-f385-4ab7-84e7-4bf231c99021",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer = json.loads(answer_json)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "258864cf-0fff-46cc-9436-227b4c918b74",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'SEARCH'"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "answer['action']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "fe7e614a-42b3-4c81-9940-8eae90193e52",
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_context(search_results):\n",
    "    context = \"\"\n",
    "\n",
    "    for doc in search_results:\n",
    "        context = context + f\"section: {doc['section']}\\nquestion: {doc['question']}\\nanswer: {doc['text']}\\n\\n\"\n",
    "    \n",
    "    return context.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "9658330f-cfd4-4400-9d73-dc149dc1672c",
   "metadata": {},
   "outputs": [],
   "source": [
    "search_results = search(question)\n",
    "context = build_context(search_results)\n",
    "prompt = prompt_template.format(question=question, context=context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "71737247-862f-48f9-a59c-a5f1d5ef575b",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer_json = llm(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "907c7992-6dad-431e-8cec-3407d3a38f36",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "\"action\": \"ANSWER\",\n",
      "\"answer\": \"Yes, you can still join the course even after the start date. Even if you haven't registered, you're eligible to submit homework. Just keep in mind that there will be deadlines for the final projects, so it's a good idea to stay on top of the deadlines and not leave things until the last minute.\",\n",
      "\"source\": \"CONTEXT\"\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "print(answer_json)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95dbdbca-ebde-4dc4-8b4a-11d93232516f",
   "metadata": {},
   "source": [
    "## Agentic Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "53c272d4-d5a8-4e3f-9484-c3a5ffa4db3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def dedup(seq):\n",
    "    seen = set()\n",
    "    result = []\n",
    "    for el in seq:\n",
    "        _id = el['_id']\n",
    "        if _id in seen:\n",
    "            continue\n",
    "        seen.add(_id)\n",
    "        result.append(el)\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "aff2fbf0-9800-4d24-a5e7-083482657720",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt_template = \"\"\"\n",
    "You're a course teaching assistant.\n",
    "\n",
    "You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.\n",
    "\n",
    "The CONTEXT is build with the documents from our FAQ database.\n",
    "SEARCH_QUERIES contains the queries that were used to retrieve the documents\n",
    "from FAQ to and add them to the context.\n",
    "PREVIOUS_ACTIONS contains the actions you already performed.\n",
    "\n",
    "At the beginning the CONTEXT is empty.\n",
    "\n",
    "You can perform the following actions:\n",
    "\n",
    "- Search in the FAQ database to get more data for the CONTEXT\n",
    "- Answer the question using the CONTEXT\n",
    "- Answer the question using your own knowledge\n",
    "\n",
    "For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.\n",
    "Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. \n",
    "\n",
    "Don't use search queries used at the previous iterations.\n",
    "\n",
    "Don't repeat previously performed actions.\n",
    "\n",
    "Don't perform more than {max_iterations} iterations for a given student question.\n",
    "The current iteration number: {iteration_number}. If we exceed the allowed number \n",
    "of iterations, give the best possible answer with the provided information.\n",
    "\n",
    "Output templates:\n",
    "\n",
    "If you want to perform search, use this template:\n",
    "\n",
    "{{\n",
    "\"action\": \"SEARCH\",\n",
    "\"reasoning\": \"<add your reasoning here>\",\n",
    "\"keywords\": [\"search query 1\", \"search query 2\", ...]\n",
    "}}\n",
    "\n",
    "If you can answer the QUESTION using CONTEXT, use this template:\n",
    "\n",
    "{{\n",
    "\"action\": \"ANSWER_CONTEXT\",\n",
    "\"answer\": \"<your answer>\",\n",
    "\"source\": \"CONTEXT\"\n",
    "}}\n",
    "\n",
    "If the context doesn't contain the answer, use your own knowledge to answer the question\n",
    "\n",
    "{{\n",
    "\"action\": \"ANSWER\",\n",
    "\"answer\": \"<your answer>\",\n",
    "\"source\": \"OWN_KNOWLEDGE\"\n",
    "}}\n",
    "\n",
    "<QUESTION>\n",
    "{question}\n",
    "</QUESTION>\n",
    "\n",
    "<SEARCH_QUERIES>\n",
    "{search_queries}\n",
    "</SEARCH_QUERIES>\n",
    "\n",
    "<CONTEXT> \n",
    "{context}\n",
    "</CONTEXT>\n",
    "\n",
    "<PREVIOUS_ACTIONS>\n",
    "{previous_actions}\n",
    "</PREVIOUS_ACTIONS>\n",
    "\"\"\".strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "210e426c-8135-4d13-bfe9-b60e21346ccd",
   "metadata": {},
   "outputs": [],
   "source": [
    "question = 'how do I do well on module 1'\n",
    "max_iterations = 3\n",
    "iteration_number = 0\n",
    "search_queries = []\n",
    "search_results  = []\n",
    "previous_actions = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "67aee99b-850f-4ced-82e0-07f365284798",
   "metadata": {},
   "outputs": [],
   "source": [
    "context = build_context(search_results)\n",
    "\n",
    "prompt = prompt_template.format(\n",
    "    question=question,\n",
    "    context=context,\n",
    "    search_queries=\"\\n\".join(search_queries),\n",
    "    previous_actions='\\n'.join([json.dumps(a) for a in previous_actions]),\n",
    "    max_iterations=max_iterations,\n",
    "    iteration_number=iteration_number\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "329d87ee-3262-4f49-9760-7ecb3c931c32",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer_json = llm(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "2c162430-5f56-40fd-9b8c-e95a31cc572c",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer = json.loads(answer_json)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "406e6100-a9ff-4871-889f-aaafda644f75",
   "metadata": {},
   "outputs": [],
   "source": [
    "previous_actions.append(answer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "fd9e3965-8260-4394-a1f3-e8c8e9785e8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "keywords = answer['keywords']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "309ed6f7-bf4e-494f-9ff1-ffcae5d9b23d",
   "metadata": {},
   "outputs": [],
   "source": [
    "for kw in keywords:\n",
    "    search_queries.append(kw)\n",
    "    sr = search(kw)\n",
    "    search_results.extend(sr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "6edcdb00-5a54-4bbe-af72-e837c37d091a",
   "metadata": {},
   "outputs": [],
   "source": [
    "search_results = dedup(search_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "a84cf11d-6673-474a-b3fc-efadae0f067b",
   "metadata": {},
   "outputs": [],
   "source": [
    "iteration_number = 2\n",
    "\n",
    "context = build_context(search_results)\n",
    "\n",
    "prompt = prompt_template.format(\n",
    "    question=question,\n",
    "    context=context,\n",
    "    search_queries=\"\\n\".join(search_queries),\n",
    "    previous_actions='\\n'.join([json.dumps(a) for a in previous_actions]),\n",
    "    max_iterations=max_iterations,\n",
    "    iteration_number=iteration_number\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "487c3127-5954-4b19-83d1-795f0c0c1525",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer_json = llm(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "f770a0aa-b057-4197-92f5-2f1f4d33dbbb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "To do well in Module 1 focused on Docker and Terraform, consider the following strategies:\n",
      "\n",
      "1. **Understand Core Concepts**: Take time to understand the fundamental concepts of Docker and Terraform. Focus on how containers work in Docker and the infrastructure as code principles in Terraform.\n",
      "\n",
      "2. **Hands-On Practice**: Engage in hands-on projects. Try to build your own Docker containers and deploy them. Similarly, create Terraform scripts to automate infrastructure deployment.\n",
      "\n",
      "3. **Utilize Documentation**: Use the official documentation for both Docker and Terraform as a primary resource. They provide excellent examples and use cases that can reinforce your learning.\n",
      "\n",
      "4. **Engage with the Community**: Participate in forums or community discussions related to Docker and Terraform. You can learn a lot from others' experiences and solutions to common problems.\n",
      "\n",
      "5. **Study Regularly**: Create a study schedule that allows you to consistently review material and practice coding, rather than cramming before assessments.\n",
      "\n",
      "6. **Experiment**: Don’t hesitate to experiment with new features in both Docker and Terraform. This will not only enhance your understanding but also prepare you for working in real-world scenarios.\n",
      "\n",
      "In addition, ensure you troubleshoot common errors, like those related to installation or configuration of these tools, as familiarity with resolving issues will enhance your practical skills and confidence.\n"
     ]
    }
   ],
   "source": [
    "print(answer['answer'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46b985cc-1b98-4522-aeaf-56e6f2beaab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"what do I need to do to be successful at module 1?\"\n",
    "\n",
    "search_queries = []\n",
    "search_results = []\n",
    "previous_actions = []\n",
    "\n",
    "iteration = 0\n",
    "\n",
    "while True:\n",
    "    print(f'ITERATION #{iteration}...')\n",
    "\n",
    "    context = build_context(search_results)\n",
    "    prompt = prompt_template.format(\n",
    "        question=question,\n",
    "        context=context,\n",
    "        search_queries=\"\\n\".join(search_queries),\n",
    "        previous_actions='\\n'.join([json.dumps(a) for a in previous_actions]),\n",
    "        max_iterations=3,\n",
    "        iteration_number=iteration\n",
    "    )\n",
    "\n",
    "    print(prompt)\n",
    "\n",
    "    answer_json = llm(prompt)\n",
    "    answer = json.loads(answer_json)\n",
    "    print(json.dumps(answer, indent=2))\n",
    "\n",
    "    previous_actions.append(answer)\n",
    "\n",
    "    action = answer['action']\n",
    "    if action != 'SEARCH':\n",
    "        break\n",
    "\n",
    "    keywords = answer['keywords']\n",
    "    search_queries = list(set(search_queries) | set(keywords))\n",
    "    \n",
    "    for k in keywords:\n",
    "        res = search(k)\n",
    "        search_results.extend(res)\n",
    "\n",
    "    search_results = dedup(search_results)\n",
    "    \n",
    "    iteration = iteration + 1\n",
    "    if iteration >= 4:\n",
    "        break\n",
    "\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "47d41418-6290-4867-9347-0bd5f86a51dc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'action': 'ANSWER',\n",
       " 'answer': \"To be successful in Module 1, which focuses on Docker and Terraform, consider the following strategies: \\n\\n1. **Hands-on Practice**: Engage in hands-on projects to solidify your understanding of Docker and Terraform. Setting up your own Docker containers and writing Terraform scripts will help reinforce the concepts. \\n\\n2. **Resources**: Use official documentation for both Docker and Terraform extensively. They're comprehensive and can provide guidance on best practices. \\n\\n3. **Community Support**: Participate in forums and community groups related to Docker and Terraform. Platforms like Stack Overflow or specific Slack channels can be helpful for problem-solving and learning from others' experiences. \\n\\n4. **Time Management**: Allocate time regularly each week to study and practice. Break down the module into manageable sections and create a study schedule. \\n\\n5. **Study Groups**: Collaborate with peers for group study sessions. Explaining concepts to others can enhance your own understanding. \\n\\n6. **Experiment**: Don't hesitate to experiment with different features of Docker and Terraform. Breaking things and fixing them is a part of learning. \\n\\n7. **Seek Feedback**: If you’re working on assignments or projects, seek feedback from your instructors or peers to identify areas for improvement. \\n\\nBy combining these strategies, you’ll be better prepared to tackle the challenges presented in Module 1 effectively.\",\n",
       " 'source': 'OWN_KNOWLEDGE'}"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "answer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "0fc3a7bc-ee48-4888-9652-7f9a09dfedd2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iteration"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59fb5d73-0425-4f8b-98dd-113e88345aa6",
   "metadata": {},
   "source": [
    "## Function calling (\"tool use\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "c6aab03e-f653-4e82-9362-ddfba53ce286",
   "metadata": {},
   "outputs": [],
   "source": [
    "def search(query):\n",
    "    boost = {'question': 3.0, 'section': 0.5}\n",
    "\n",
    "    results = index.search(\n",
    "        query=query,\n",
    "        filter_dict={'course': 'data-engineering-zoomcamp'},\n",
    "        boost_dict=boost,\n",
    "        num_results=5,\n",
    "        output_ids=True\n",
    "    )\n",
    "\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "aa30ef3a-99e6-4e50-94ef-9d76a7431bb1",
   "metadata": {},
   "outputs": [],
   "source": [
    "search_tool = {\n",
    "    \"type\": \"function\",\n",
    "    \"name\": \"search\",\n",
    "    \"description\": \"Search the FAQ database\",\n",
    "    \"parameters\": {\n",
    "        \"type\": \"object\",\n",
    "        \"properties\": {\n",
    "            \"query\": {\n",
    "                \"type\": \"string\",\n",
    "                \"description\": \"Search query text to look up in the course FAQ.\"\n",
    "            }\n",
    "        },\n",
    "        \"required\": [\"query\"],\n",
    "        \"additionalProperties\": False\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "de560ff4-7a38-4734-abe2-5149459eb873",
   "metadata": {},
   "outputs": [],
   "source": [
    "def do_call(tool_call_response):\n",
    "    function_name = tool_call_response.name\n",
    "    arguments = json.loads(tool_call_response.arguments)\n",
    "\n",
    "    f = globals()[function_name]\n",
    "    result = f(**arguments)\n",
    "\n",
    "    return {\n",
    "        \"type\": \"function_call_output\",\n",
    "        \"call_id\": tool_call_response.call_id,\n",
    "        \"output\": json.dumps(result, indent=2),\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "ea2ff680-e6f7-431a-a6f1-7fc6f47f184e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ResponseFunctionToolCall(arguments='{\"query\":\"module 1 tips for success\"}', call_id='call_FcpWXGZqHeLqMecDQwLCMPXq', name='search', type='function_call', id='fc_686401a3efbc8191b3646e3ad1218ac80e0676c3b7e4712d', status='completed'),\n",
       " ResponseFunctionToolCall(arguments='{\"query\":\"how to excel in module 1\"}', call_id='call_PNjiVZq3Fe66ODLrgup0SaRm', name='search', type='function_call', id='fc_686401a4e5448191a158f243fe1518b80e0676c3b7e4712d', status='completed')]"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "question = \"How do I do well in module 1?\"\n",
    "\n",
    "developer_prompt = \"\"\"\n",
    "You're a course teaching assistant. \n",
    "You're given a question from a course student and your task is to answer it.\n",
    "If you look up something in FAQ, convert the student question into multiple queries.\n",
    "\"\"\".strip()\n",
    "\n",
    "tools = [search_tool]\n",
    "\n",
    "chat_messages = [\n",
    "    {\"role\": \"developer\", \"content\": developer_prompt},\n",
    "    {\"role\": \"user\", \"content\": question}\n",
    "]\n",
    "\n",
    "response = client.responses.create(\n",
    "    model='gpt-4o-mini',\n",
    "    input=chat_messages,\n",
    "    tools=tools\n",
    ")\n",
    "response.output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "0f2d0c70-621d-453f-a519-35a0fcb37557",
   "metadata": {},
   "outputs": [],
   "source": [
    "calls = response.output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "c8776293-78b8-4180-93e0-5c399f8c6fde",
   "metadata": {},
   "outputs": [],
   "source": [
    "for call in calls:\n",
    "    result = do_call(call)\n",
    "    chat_messages.append(call)\n",
    "    chat_messages.append(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "4b4ce91c-6287-4c7d-bfe9-82edbfc591c3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ResponseOutputMessage(id='msg_686401ebe13c81918f6f3c84a1e7cec00e0676c3b7e4712d', content=[ResponseOutputText(annotations=[], text='To do well in Module 1, here are some tips based on common challenges and solutions:\\n\\n1. **Understand Your Environment**:\\n   - Ensure that you have set up your development environment correctly. This includes installing Docker and Terraform as indicated in the module resources.\\n\\n2. **Common Installation Issues**:\\n   - If you encounter errors such as `ModuleNotFoundError: No module named \\'psycopg2\\'`, try installing it via:\\n     ```bash\\n     pip install psycopg2-binary\\n     ```\\n   - If you\\'re still encountering issues, consider updating pip or conda:\\n     ```bash\\n     pip install --upgrade pip\\n     ```\\n     or\\n     ```bash\\n     conda update -n base -c defaults conda\\n     ```\\n\\n3. **PostgreSQL Connectivity**:\\n   - When connecting to PostgreSQL using SQLAlchemy, ensure your connection string is formatted correctly, for example:\\n     ```python\\n     conn_string = \"postgresql+psycopg://username:password@localhost:5432/your_database\"\\n     ```\\n   - If you face errors related to the connection, make sure PostgreSQL is installed and running.\\n\\n4. **Hands-on Practice**:\\n   - Engage in hands-on exercises that utilize Docker and Terraform. The more you practice, the more comfortable you\\'ll be with the concepts.\\n\\n5. **Use Provided Resources**:\\n   - Refer to any supplementary materials provided in the course for additional guidance on technical setups and exercises.\\n\\n6. **Seek Help if Needed**:\\n   - Don’t hesitate to reach out to instructors or peers if you run into any issues. The course community can be a valuable resource.\\n\\nFollowing these tips should enhance your understanding and performance in Module 1! If you have specific topics you\\'d like more detail on, feel free to ask.', type='output_text', logprobs=[])], role='assistant', status='completed', type='message')]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = client.responses.create(\n",
    "    model='gpt-4o-mini',\n",
    "    input=chat_messages,\n",
    "    tools=tools\n",
    ")\n",
    "response.output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "197f39e2-a611-4ccf-9720-746b6f9202ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "for entry in response.output:\n",
    "    chat_messages.append(entry)\n",
    "    print(entry.type)\n",
    "\n",
    "    if entry.type == 'function_call':      \n",
    "        result = do_call(entry)\n",
    "        chat_messages.append(result)\n",
    "    elif entry.type == 'message':\n",
    "        print(entry.text) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88efef06-cb56-442c-9217-3b12dcc69c51",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "9f1bf28a-6051-4007-99e7-de18ca996ab1",
   "metadata": {},
   "outputs": [],
   "source": [
    "developer_prompt = \"\"\"\n",
    "You're a course teaching assistant. \n",
    "You're given a question from a course student and your task is to answer it.\n",
    "\n",
    "Use FAQ if your own knowledge is not sufficient to answer the question.\n",
    "When using FAQ, perform deep topic exploration: make one request to FAQ,\n",
    "and then based on the results, make more requests.\n",
    "\n",
    "At the end of each response, ask the user a follow up question based on your answer.\n",
    "\"\"\".strip()\n",
    "\n",
    "chat_messages = [\n",
    "    {\"role\": \"developer\", \"content\": developer_prompt},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "09415738-bfab-4a58-a0ee-99fa25c2f343",
   "metadata": {},
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " How do I do well in module 1?\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "function_call: ResponseFunctionToolCall(arguments='{\"query\":\"module 1 tips\"}', call_id='call_IED9lyUZOsS6ToxvZHLNQoN6', name='search', type='function_call', id='fc_686403087180819ebf792db8930e3f830594799b59703b57', status='completed')\n",
      "\n",
      "function_call: ResponseFunctionToolCall(arguments='{\"query\":\"module 1 successful strategies\"}', call_id='call_WAvKTixmfYyoBrF89lS6N46R', name='search', type='function_call', id='fc_68640309279c819ea24ded9b9639c7330594799b59703b57', status='completed')\n",
      "\n",
      "To excel in Module 1, which focuses on Docker and Terraform, here are some key strategies and tips based on common challenges faced by students:\n",
      "\n",
      "1. **Understand Docker Basics**: Start by familiarizing yourself with the Docker ecosystem, including images, containers, and Docker Compose. Having a solid grasp of these concepts will help you avoid common pitfalls.\n",
      "\n",
      "2. **Environment Setup**: Ensure you have a correctly set up local environment. This includes installing Docker and any necessary dependencies. For instance, if you encounter errors like `ModuleNotFoundError: No module named 'psycopg2'`, make sure to install it using:\n",
      "   ```bash\n",
      "   pip install psycopg2-binary\n",
      "   ```\n",
      "   or update as needed if you run into issues.\n",
      "\n",
      "3. **Jupyter Notebooks**: When working in Jupyter notebooks, ensure you can access the necessary libraries. If you run into module errors, consider installing the required packages directly in the notebook, like so:\n",
      "   ```python\n",
      "   !pip install psycopg2-binary\n",
      "   ```\n",
      "\n",
      "4. **Hands-On Practice**: Engage with the hands-on exercises provided in the module. Practicing with real commands and setups will cement your understanding.\n",
      "\n",
      "5. **Ask Questions**: If you're stuck on a particular exercise or concept, don't hesitate to reach out to peers or instructors. Collaborating with others can enhance your learning experience.\n",
      "\n",
      "6. **Review Code and Examples**: Go through the provided examples in the course and try to replicate them. This will give you practical insights into how to apply what you learn.\n",
      "\n",
      "7. **Error Handling**: Learn to read error messages carefully. For instance, if you see errors about missing modules, check your installation commands. Many common issues arise from simple typos or assumptions about package availability.\n",
      "\n",
      "If you keep these strategies in mind and actively engage with the materials, you'll improve your chances of success in Module 1. \n",
      "\n",
      "What specific aspects of Module 1 are you finding challenging?\n",
      "\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " Docker and Terraform\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "function_call: ResponseFunctionToolCall(arguments='{\"query\":\"Docker tips for Module 1\"}', call_id='call_zcxJlRqgjfMjavLOgVY1FSj5', name='search', type='function_call', id='fc_6864031f32a0819e850cbe6692b6ab160594799b59703b57', status='completed')\n",
      "\n",
      "function_call: ResponseFunctionToolCall(arguments='{\"query\":\"Terraform tips for Module 1\"}', call_id='call_4n2UHkf4UtPpg3XAM0V5WFfI', name='search', type='function_call', id='fc_6864031f61ec819eba1a01625d5a64430594799b59703b57', status='completed')\n",
      "\n",
      "To do well in Module 1 focusing on Docker and Terraform, here are some targeted tips for both technologies:\n",
      "\n",
      "### Docker Tips:\n",
      "1. **Basic Understanding**: Ensure you have foundational knowledge of Docker, such as how to create and manage containers, and how to use Docker Compose for multi-container applications.\n",
      "\n",
      "2. **Correct Installation**: If you encounter issues like `ModuleNotFoundError: No module named 'psycopg2'`, make sure that you're installing the required Python modules in your Docker environment. Update your Dockerfile to include:\n",
      "   ```Dockerfile\n",
      "   RUN python -m pip install psycopg2-binary\n",
      "   ```\n",
      "\n",
      "3. **Working with Jupyter Notebooks**: When executing code in Jupyter, install necessary modules using:\n",
      "   ```python\n",
      "   !pip install psycopg2-binary\n",
      "   ```\n",
      "\n",
      "4. **Consistent Environment**: Keep your development environment consistent with Docker. Always build and run your containers from the same local environment to minimize errors.\n",
      "\n",
      "### Terraform Tips:\n",
      "1. **Initialization**: When using Terraform, remember to run `terraform init` inside the correct working directory that contains your `.tf` files. If you run it outside, you'll receive errors about missing configurations.\n",
      "\n",
      "2. **Permissions**: Pay attention to permissions in your Google Cloud project. If you encounter a `403` error related to `storage.buckets.create`, ensure you're using the correct Project ID and that the service account has the necessary permissions.\n",
      "\n",
      "3. **Check System Time**: If you face issues with JWT tokens (like `invalid_grant` errors), it might be due to time desynchronization on your machine. Sync your system time using:\n",
      "   ```bash\n",
      "   sudo hwclock -s\n",
      "   ```\n",
      "\n",
      "4. **Error Logging**: When encountering errors, rely on error messages to guide you. For example, if Terraform mentions a state lock issue, review the relevant documentation or GitHub issues related to your error.\n",
      "\n",
      "### General Strategy:\n",
      "- **Practice**: The more you practice using Docker and Terraform, the more comfortable you'll become with their commands and functionalities.\n",
      "  \n",
      "- **Documentation**: Refer to the official Docker and Terraform documentation for troubleshooting and advanced tips. This can be invaluable in resolving specific issues you may face.\n",
      "\n",
      "If you need help with a specific Docker or Terraform command or concept, feel free to ask! What part of Docker or Terraform are you currently struggling with the most?\n",
      "\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " stop\n"
     ]
    }
   ],
   "source": [
    "while True: # main Q&A loop\n",
    "    question = input() # How do I do my best for module 1?\n",
    "    if question == 'stop':\n",
    "        break\n",
    "\n",
    "    message = {\"role\": \"user\", \"content\": question}\n",
    "    chat_messages.append(message)\n",
    "\n",
    "    while True: # request-response loop - query API till get a message\n",
    "        response = client.responses.create(\n",
    "            model='gpt-4o-mini',\n",
    "            input=chat_messages,\n",
    "            tools=tools\n",
    "        )\n",
    "\n",
    "        has_messages = False\n",
    "        \n",
    "        for entry in response.output:\n",
    "            chat_messages.append(entry)\n",
    "        \n",
    "            if entry.type == 'function_call':      \n",
    "                print('function_call:', entry)\n",
    "                print()\n",
    "                result = do_call(entry)\n",
    "                chat_messages.append(result)\n",
    "            elif entry.type == 'message':\n",
    "                print(entry.content[0].text)\n",
    "                print()\n",
    "                has_messages = True\n",
    "\n",
    "        if has_messages:\n",
    "            break"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b72e654-8f21-46a9-ac7a-ef67f7bd9120",
   "metadata": {},
   "source": [
    "## Multiple tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "af08b204-f01d-4ecf-8232-69ad68c80b3b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "--2025-07-01 17:50:28--  https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 3485 (3.4K) [text/plain]\n",
      "Saving to: 'chat_assistant.py'\n",
      "\n",
      "     0K ...                                                   100%  434K=0.008s\n",
      "\n",
      "2025-07-01 17:50:28 (434 KB/s) - 'chat_assistant.py' saved [3485/3485]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "83cbac21-c207-4bbd-853e-e5c9dc5329da",
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_entry(question, answer):\n",
    "    doc = {\n",
    "        'question': question,\n",
    "        'text': answer,\n",
    "        'section': 'user added',\n",
    "        'course': 'data-engineering-zoomcamp'\n",
    "    }\n",
    "    index.append(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "15ecaa5c-f791-44c1-b09a-86e55caf9eb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "add_entry_description = {\n",
    "    \"type\": \"function\",\n",
    "    \"name\": \"add_entry\",\n",
    "    \"description\": \"Add an entry to the FAQ database\",\n",
    "    \"parameters\": {\n",
    "        \"type\": \"object\",\n",
    "        \"properties\": {\n",
    "            \"question\": {\n",
    "                \"type\": \"string\",\n",
    "                \"description\": \"The question to be added to the FAQ database\",\n",
    "            },\n",
    "            \"answer\": {\n",
    "                \"type\": \"string\",\n",
    "                \"description\": \"The answer to the question\",\n",
    "            }\n",
    "        },\n",
    "        \"required\": [\"question\", \"answer\"],\n",
    "        \"additionalProperties\": False\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "76ca7181-427c-49e2-aa09-e486b13760f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'type': 'function',\n",
       "  'name': 'search',\n",
       "  'description': 'Search the FAQ database',\n",
       "  'parameters': {'type': 'object',\n",
       "   'properties': {'query': {'type': 'string',\n",
       "     'description': 'Search query text to look up in the course FAQ.'}},\n",
       "   'required': ['query'],\n",
       "   'additionalProperties': False}}]"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import chat_assistant\n",
    "\n",
    "tools = chat_assistant.Tools()\n",
    "tools.add_tool(search, search_tool)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "b0366224-695e-4295-972d-4e6857fdb214",
   "metadata": {},
   "outputs": [],
   "source": [
    "tools.add_tool(add_entry, add_entry_description)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "8e8cdea1-6e33-4fb8-a2ff-d8d59637d28f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'type': 'function',\n",
       "  'name': 'search',\n",
       "  'description': 'Search the FAQ database',\n",
       "  'parameters': {'type': 'object',\n",
       "   'properties': {'query': {'type': 'string',\n",
       "     'description': 'Search query text to look up in the course FAQ.'}},\n",
       "   'required': ['query'],\n",
       "   'additionalProperties': False}},\n",
       " {'type': 'function',\n",
       "  'name': 'add_entry',\n",
       "  'description': 'Add an entry to the FAQ database',\n",
       "  'parameters': {'type': 'object',\n",
       "   'properties': {'question': {'type': 'string',\n",
       "     'description': 'The question to be added to the FAQ database'},\n",
       "    'answer': {'type': 'string', 'description': 'The answer to the question'}},\n",
       "   'required': ['question', 'answer'],\n",
       "   'additionalProperties': False}}]"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools.get_tools()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "78144b8f-0b15-466a-8eca-46ad3d92ad24",
   "metadata": {},
   "outputs": [],
   "source": [
    "developer_prompt = \"\"\"\n",
    "You're a course teaching assistant. \n",
    "You're given a question from a course student and your task is to answer it.\n",
    "\n",
    "Use FAQ if your own knowledge is not sufficient to answer the question.\n",
    "\n",
    "At the end of each response, ask the user a follow up question based on your answer.\n",
    "\"\"\".strip()\n",
    "\n",
    "chat_interface = chat_assistant.ChatInterface()\n",
    "\n",
    "chat = chat_assistant.ChatAssistant(\n",
    "    tools=tools,\n",
    "    developer_prompt=developer_prompt,\n",
    "    chat_interface=chat_interface,\n",
    "    client=client\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "fb9b1ddc-eaa1-4a4c-b591-fe084b9d1eb5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "You: How do I do well in module 1?\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "            <details>\n",
       "            <summary>Function call: <tt>search({\"query\":\"do well in module 1\"})</tt></summary>\n",
       "            <div>\n",
       "                <b>Call</b>\n",
       "                <pre>ResponseFunctionToolCall(arguments='{\"query\":\"do well in module 1\"}', call_id='call_4iEnGse7DlYAvMy7ljBlaEDW', name='search', type='function_call', id='fc_6864051e058c81a3b2be4fc97f893c930d66a27d5be27c59', status='completed')</pre>\n",
       "            </div>\n",
       "            <div>\n",
       "                <b>Output</b>\n",
       "                <pre>[\n",
       "  {\n",
       "    \"text\": \"Issue:\\ne\\u2026\\nSolution:\\npip install psycopg2-binary\\nIf you already have it, you might need to update it:\\npip install psycopg2-binary --upgrade\\nOther methods, if the above fails:\\nif you are getting the \\u201c ModuleNotFoundError: No module named 'psycopg2' \\u201c error even after the above installation, then try updating conda using the command conda update -n base -c defaults conda. Or if you are using pip, then try updating it before installing the psycopg packages i.e\\nFirst uninstall the psycopg package\\nThen update conda or pip\\nThen install psycopg again using pip.\\nif you are still facing error with r pcycopg2 and showing pg_config not found then you will have to install postgresql. in MAC it is brew install postgresql\",\n",
       "    \"section\": \"Module 1: Docker and Terraform\",\n",
       "    \"question\": \"Postgres - ModuleNotFoundError: No module named 'psycopg2'\",\n",
       "    \"course\": \"data-engineering-zoomcamp\",\n",
       "    \"_id\": 112\n",
       "  },\n",
       "  {\n",
       "    \"text\": \"Following dbt with BigQuery on Docker readme.md, after `docker-compose build` and `docker-compose run dbt-bq-dtc init`, encountered error `ModuleNotFoundError: No module named 'pytz'`\\nSolution:\\nAdd `RUN python -m pip install --no-cache pytz` in the Dockerfile under `FROM --platform=$build_for python:3.9.9-slim-bullseye as base`\",\n",
       "    \"section\": \"Module 4: analytics engineering with dbt\",\n",
       "    \"question\": \"DBT - Error: No module named 'pytz' while setting up dbt with docker\",\n",
       "    \"course\": \"data-engineering-zoomcamp\",\n",
       "    \"_id\": 299\n",
       "  },\n",
       "  {\n",
       "    \"text\": \"create_engine('postgresql://root:root@localhost:5432/ny_taxi')  I get the error \\\"TypeError: 'module' object is not callable\\\"\\nSolution:\\nconn_string = \\\"postgresql+psycopg://root:root@localhost:5432/ny_taxi\\\"\\nengine = create_engine(conn_string)\",\n",
       "    \"section\": \"Module 1: Docker and Terraform\",\n",
       "    \"question\": \"Python - SQLALchemy - TypeError 'module' object is not callable\",\n",
       "    \"course\": \"data-engineering-zoomcamp\",\n",
       "    \"_id\": 124\n",
       "  },\n",
       "  {\n",
       "    \"text\": \"Error raised during the jupyter notebook\\u2019s cell execution:\\nengine = create_engine('postgresql://root:root@localhost:5432/ny_taxi').\\nSolution: Need to install Python module \\u201cpsycopg2\\u201d. Can be installed by Conda or pip.\",\n",
       "    \"section\": \"Module 1: Docker and Terraform\",\n",
       "    \"question\": \"Python - SQLAlchemy - ModuleNotFoundError: No module named 'psycopg2'.\",\n",
       "    \"course\": \"data-engineering-zoomcamp\",\n",
       "    \"_id\": 125\n",
       "  },\n",
       "  {\n",
       "    \"text\": \"You need to look for the Py4J file and note the version of the filename. Once you know the version, you can update the export command accordingly, this is how you check yours:\\n` ls ${SPARK_HOME}/python/lib/ ` and then you add it in the export command, mine was:\\nexport PYTHONPATH=\\u201d${SPARK_HOME}/python/lib/Py4J-0.10.9.5-src.zip:${PYTHONPATH}\\u201d\\nMake sure that the version under `${SPARK_HOME}/python/lib/` matches the filename of py4j or you will encounter `ModuleNotFoundError: No module named 'py4j'` while executing `import pyspark`.\\nFor instance, if the file under `${SPARK_HOME}/python/lib/` was `py4j-0.10.9.3-src.zip`.\\nThen the export PYTHONPATH statement above should be changed to `export PYTHONPATH=\\\"${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:$PYTHONPATH\\\"` appropriately.\\nAdditionally, you can check for the version of \\u2018py4j\\u2019 of the spark you\\u2019re using from here and update as mentioned above.\\n~ Abhijit Chakraborty: Sometimes, even with adding the correct version of py4j might not solve the problem. Simply run pip install py4j and problem should be resolved.\",\n",
       "    \"section\": \"Module 5: pyspark\",\n",
       "    \"question\": \"Py4JJavaError - ModuleNotFoundError: No module named 'py4j'` while executing `import pyspark`\",\n",
       "    \"course\": \"data-engineering-zoomcamp\",\n",
       "    \"_id\": 323\n",
       "  }\n",
       "]</pre>\n",
       "            </div>\n",
       "            \n",
       "            </details>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <div><b>Assistant:</b></div>\n",
       "                <div><p>To excel in Module 1, here are some tips based on the course content:</p>\n",
       "<ol>\n",
       "<li>\n",
       "<p><strong>Understand the Basics</strong>: Ensure you grasp foundational concepts like Docker and Terraform, as these are essential for building a solid understanding in this module.</p>\n",
       "</li>\n",
       "<li>\n",
       "<p><strong>Practice Regularly</strong>: Engage with the practical assignments and exercises. Working hands-on will reinforce the concepts and tools covered.</p>\n",
       "</li>\n",
       "<li>\n",
       "<p><strong>Utilize Resources</strong>: Refer to the course materials, documentation, and any suggested readings. These are invaluable for deepening your comprehension.</p>\n",
       "</li>\n",
       "<li>\n",
       "<p><strong>Ask Questions</strong>: Don’t hesitate to reach out if you encounter difficulties or uncertainties. Engaging with peers or instructors can clarify your understanding.</p>\n",
       "</li>\n",
       "<li>\n",
       "<p><strong>Review Feedback</strong>: After completing sessions or tasks, take the time to review any feedback provided. This can guide your improvements in subsequent tasks.</p>\n",
       "</li>\n",
       "<li>\n",
       "<p><strong>Stay Organized</strong>: Keep your work organized, especially when handling multiple tools and configurations so you can easily trace back any errors that may arise.</p>\n",
       "</li>\n",
       "</ol>\n",
       "<p>Would you like more specific advice on any particular aspect of Module 1?</p></div>\n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "You: add this to the FAQ database\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "            <details>\n",
       "            <summary>Function call: <tt>add_entry({\"question\":\"How do I do well in module 1?\",\"an...)</tt></summary>\n",
       "            <div>\n",
       "                <b>Call</b>\n",
       "                <pre>ResponseFunctionToolCall(arguments='{\"question\":\"How do I do well in module 1?\",\"answer\":\"1. Understand the Basics: Ensure you grasp foundational concepts like Docker and Terraform.\\\\n2. Practice Regularly: Engage with practical assignments to reinforce concepts.\\\\n3. Utilize Resources: Refer to course materials, documentation, and suggested readings.\\\\n4. Ask Questions: Reach out if you encounter difficulties; engaging with peers or instructors can clarify your understanding.\\\\n5. Review Feedback: Take time to review feedback after completing tasks to guide improvements.\\\\n6. Stay Organized: Keep your work organized to easily trace back errors.\"}', call_id='call_bDeJ7ZqU9rKYPnSSPIgiIvV4', name='add_entry', type='function_call', id='fc_68640534ef8c81a389b44679839c6ce40d66a27d5be27c59', status='completed')</pre>\n",
       "            </div>\n",
       "            <div>\n",
       "                <b>Output</b>\n",
       "                <pre>null</pre>\n",
       "            </div>\n",
       "            \n",
       "            </details>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <div><b>Assistant:</b></div>\n",
       "                <div><p>I've added your question and answer to the FAQ database successfully!</p>\n",
       "<p>Do you have any other questions or topics you'd like to explore further?</p></div>\n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "You: stop\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chat ended.\n"
     ]
    }
   ],
   "source": [
    "chat.run()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "501458d2-2f15-4484-b8f1-f300a5e1f7e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'question': 'How do I do well in module 1?',\n",
       " 'text': '1. Understand the Basics: Ensure you grasp foundational concepts like Docker and Terraform.\\n2. Practice Regularly: Engage with practical assignments to reinforce concepts.\\n3. Utilize Resources: Refer to course materials, documentation, and suggested readings.\\n4. Ask Questions: Reach out if you encounter difficulties; engaging with peers or instructors can clarify your understanding.\\n5. Review Feedback: Take time to review feedback after completing tasks to guide improvements.\\n6. Stay Organized: Keep your work organized to easily trace back errors.',\n",
       " 'section': 'user added',\n",
       " 'course': 'data-engineering-zoomcamp'}"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index.docs[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "9b7d569e-ef87-4afe-839c-f873498d54d1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<minsearch.append.AppendableIndex at 0x233d5173b90>"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b265a810-7720-4f19-8897-c34fbba4bc55",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
